This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document
# install.packages("erp.easy")
library(erp.easy)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
# Locate the folder for the EEG output files (.txt) for old and new nets, replace the file location below with the one in your local device:
path_newnets <- "/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/newnets/"
path_oldnets <- "/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/oldnets/"
# Enter the number of participants in each group:
subs_new <- 57
subs_old <- 11
# Load data into dataframes for each condition separately (the exported .txt files appear separately for each condition):
neg_go <- load.data(path_newnets,"NegGo", subs_new, -100, 999)
neg_nogo <- load.data(path_newnets,"NegNoGo", subs_new, -100, 999)
neut_go <- load.data(path_newnets,"NeutGo", subs_new, -100, 999)
neut_nogo <- load.data(path_newnets,"NeutNoGo", subs_new, -100, 999)
# Combine all conditions together into a single dataframe:
combo_new <- rbind.data.frame(neg_go, neg_nogo, neut_go, neut_nogo)
combo_new <- as.data.frame(unclass(combo_new), stringsAsFactors=TRUE)
# Repeat for old nets:
neg_go_old <- load.data(path_oldnets,"NegGo", subs_old, -100, 999)
neg_nogo_old <- load.data(path_oldnets,"NegNoGo", subs_old, -100, 999)
neut_go_old <- load.data(path_oldnets,"NeutGo", subs_old, -100, 999)
neut_nogo_old <- load.data(path_oldnets,"NeutNoGo", subs_old, -100, 999)
combo_old <- rbind.data.frame(neg_go_old, neg_nogo_old, neut_go_old, neut_nogo_old)
combo_old <- as.data.frame(unclass(combo_old),stringsAsFactors=TRUE)
# We are only interested in frontal P2 (positive) and N2 (negative)
# Posterior P3 and Frontal P3
p2n2_newnets <- c("V4","V5","V10","V11","V12","V16","V18","V19")
p2n2_oldnets <- c("V4","V5","V10","V11","V12","V16","V19", "V20")
p3_newnets <- c("V54","V61","V62","V67","V72","V77","V78","V79")
p3_oldnets <- c("V54","V61","V62","V67","V68","V73","V78","V79","V80")
#Create average waveform plots for each subject in a single, multiplot window
mosaic(combo_new, p2n2_newnets, cols = 3, rows = 2)
mosaic(combo_new, p3_newnets, cols = 3, rows = 2)
# CODE BELOW GETS ALL THE MEASURES (N2, P2, P3) FROM OLD AND NEW NET
DATA, COMBINE THEM TOGETHER AND IT SAVES THE DATA INTO A FINAL COMBO
SPREADSHEET: # CHECK THE WINDOW RANGE FOR EACH ERP COMPONENT AND ADJUST
AS NEEDED! * The dependent measures we use are mean amplitude
(microvolts) and latency (in ms) for statistical analysis. * m.measures
gives the mean amplitude for a specific window and its dt dev. along
with its graph. specify lgnd = “n” (no legend) if you do not want the
legend * p.measures calculates local or simple peak amplitude and
latency for each condition in the data frame. Use latency only from
p.measures * pol = The polarity of peaks to favor when multiple peaks
are present. Entering “pos” will locate the most positive peak. Entering
“neg” will locate the most negative peak. Entering “abs” will find the
greatest deviation from 0, regardless of the polarity.
# Get the measures from the old NEW net:
MeanAmp_P2_newnets <- (m.measures(combo_new, p2n2_newnets, window=c(150,300)))
MeanAmp_N2_newnets <- (m.measures(combo_new, p2n2_newnets, window=c(350,550)))
MeanAmp_P3_newnets <- (m.measures(combo_new, p3_newnets, window=c(450,750)))
Latency_P2_newnets <- (p.measures(combo_new, p2n2_newnets, window=c(150,300), pol="pos"))
Latency_N2_newnets <- (p.measures(combo_new, p2n2_newnets, window=c(350,550), pol="neg"))
Latency_P3_newnets <- (p.measures(combo_new, p3_newnets, window=c(450,750), pol="pos"))
# Combine all results together
# You can use full_join or merge in the same way:
total_new <- MeanAmp_P2_newnets %>%
full_join(MeanAmp_N2_newnets, by = c("Subject", "Trial Type"), suffix = c(".P2", ".N2")) %>%
full_join(MeanAmp_P3_newnets, by = c("Subject", "Trial Type")) %>%
full_join(Latency_P2_newnets, by = c("Subject", "Trial Type")) %>%
full_join(Latency_N2_newnets, by = c("Subject", "Trial Type"), suffix = c(".P2", ".N2")) %>%
full_join(Latency_P3_newnets, by = c("Subject", "Trial Type"))
# rename the variables without any suffix here:
# rename does not work properly unless you specify the package - some conflict:
total_new <- total_new %>% dplyr::rename("Mean Amplitude.P3" = "Mean Amplitude",
"Standard Dev.P3" = "Standard Dev",
"Peak Latency.P3" = "Peak Latency",
"Peak Amplitude.P3" = "Peak Amplitude")
# Get the measures from the old OLD net:
MeanAmp_P2_oldnets <- (m.measures(combo_old, p2n2_oldnets, window=c(150,300)))
MeanAmp_N2_oldnets <- (m.measures(combo_old, p2n2_oldnets, window=c(350,550)))
MeanAmp_P3_oldnets <- (m.measures(combo_old, p3_oldnets, window=c(450,750)))
Latency_P2_oldnets <- (p.measures(combo_old, p2n2_oldnets, window=c(150,300), pol="pos"))
Latency_N2_oldnets <- (p.measures(combo_old, p2n2_oldnets, window=c(350,550), pol="neg"))
Latency_P3_oldnets <- (p.measures(combo_old, p3_oldnets, window=c(450,750), pol="pos"))
# Combine all results together
total_old <- MeanAmp_P2_oldnets %>%
full_join(MeanAmp_N2_oldnets, by = c("Subject", "Trial Type"), suffix = c(".P2", ".N2")) %>%
full_join(MeanAmp_P3_oldnets, by = c("Subject", "Trial Type")) %>%
full_join(Latency_P2_oldnets, by = c("Subject", "Trial Type")) %>%
full_join(Latency_N2_oldnets, by = c("Subject", "Trial Type"), suffix = c(".P2", ".N2")) %>%
full_join(Latency_P3_oldnets, by = c("Subject", "Trial Type"))
# rename the variables without any suffix here:
total_old <- total_old %>% dplyr::rename("Mean Amplitude.P3" = "Mean Amplitude",
"Standard Dev.P3" = "Standard Dev",
"Peak Latency.P3" = "Peak Latency",
"Peak Amplitude.P3" = "Peak Amplitude")
# Combine old and new net data together:
combo <- full_join(total_new, total_old)
## Joining, by = c("Subject", "Trial Type", "Standard Dev.P2", "Mean
## Amplitude.P2", "Standard Dev.N2", "Mean Amplitude.N2", "Standard Dev.P3", "Mean
## Amplitude.P3", "Peak Latency.P2", "Peak Amplitude.P2", "Peak Latency.N2", "Peak
## Amplitude.N2", "Peak Latency.P3", "Peak Amplitude.P3")
# Making sure we are only adding new rows - participants
nrow(total_new) + nrow(total_old) == nrow(combo)
## [1] TRUE
ncol(total_old) == ncol(total_new)
## [1] TRUE
ncol(total_new) == ncol(combo)
## [1] TRUE
# Remove Grand Ave from data, order by subject name and reset the index:
combo <- combo[!(combo$Subject=="Grand Avg"),]
combo <- with(combo, combo[order(Subject) , ])
rownames(combo) <- NULL
unique(combo[c("Subject")])
## Subject
## 1 AE050318
## 5 AH101121
## 9 AK022218
## 13 AK102221
## 17 AL041819
## 21 AL090917
## 25 AN122116
## 29 AO013020
## 33 AS110816
## 37 AT051818
## 41 AW040217
## 45 AW110418
## 49 BW071018T2
## 53 CC102318T2
## 57 CF101019
## 61 CL040218
## 65 CM120919
## 69 DJ052417
## 73 EC041817
## 77 EG030618
## 81 EM100417
## 85 ES031519
## 89 ES032018
## 93 FW121816
## 97 GB012817
## 101 GR071921
## 105 HC102117
## 109 HC111621
## 113 HH061919
## 117 JA092118
## 121 JG091119T3
## 125 JJ011018
## 129 JK032119T3
## 133 JS121321
## 137 JT051618
## 141 KA022017
## 145 KE050718
## 149 KT072319
## 153 LB012619
## 157 LG100721
## 161 LS100617
## 165 LW102219T3
## 169 MM040119
## 173 MR091118T2
## 177 MS102319
## 181 NL041119
## 185 OG013016
## 189 PB021519
## 193 PW030417
## 197 RB101619
## 201 RH100218
## 205 RK040219T3
## 209 RS030518
## 213 RT032219
## 217 SA072518
## 221 SB111121
## 225 SK041519
## 229 SL090418
## 233 SP010219
## 237 ST100121
## 241 TA051917
## 245 TE062818
## 249 TS011518
## 253 WB110221
## 257 WF080417
## 261 WH022219
## 265 WK011122
## 269 WS051018
# CREATE A NEW COLUMN by taking the difference between N2-P2
combo$`N2P2avg` <- combo$`Mean Amplitude.N2` - combo$`Mean Amplitude.P2`
combo$`N2P2peak` <- combo$`Peak Amplitude.N2` - combo$`Peak Amplitude.P2`
# Write to a csv file:
# write.csv(combo, "/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/combo.csv")
head(combo)
## Subject Trial Type Standard Dev.P2 Mean Amplitude.P2 Standard Dev.N2
## 1 AE050318 NegGo 3.021852 -6.470334 3.907745
## 2 AE050318 NegNoGo 5.962168 3.531623 2.249580
## 3 AE050318 NeutGo 2.577527 -10.326151 3.458130
## 4 AE050318 NeutNoGo 5.419626 -10.376971 2.866578
## 5 AH101121 NegGo 2.182505 -7.668577 1.576681
## 6 AH101121 NegNoGo 3.187342 -4.029610 1.060874
## Mean Amplitude.N2 Standard Dev.P3 Mean Amplitude.P3 Peak Latency.P2
## 1 -10.871784 1.377677 3.505749 220
## 2 2.023796 2.252998 3.432536 256
## 3 -15.687000 1.537678 4.367052 228
## 4 -7.857202 2.118532 7.358546 204
## 5 -24.930473 4.063984 18.547604 232
## 6 -24.144933 4.750914 14.173249 244
## Peak Amplitude.P2 Peak Latency.N2 Peak Amplitude.N2 Peak Latency.P3
## 1 -2.7612018 536 -5.187617 648
## 2 11.4069294 452 -2.268911 596
## 3 -7.3050607 376 -20.651664 588
## 4 -4.5040871 452 -7.270952 720
## 5 -5.8547045 492 -26.713190 528
## 6 -0.5727539 476 -25.672664 508
## Peak Amplitude.P3 N2P2avg N2P2peak
## 1 5.088120 -4.401450 -2.426415
## 2 6.870649 -1.507826 -13.675841
## 3 7.176728 -5.360848 -13.346603
## 4 10.265959 2.519769 -2.766865
## 5 23.899982 -17.261896 -20.858486
## 6 18.808306 -20.115323 -25.099910
# Load DataSet:
intake <- read.csv(file = '/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/CognitiveEmotionalLi-IntakeStutteringLang.csv')
# Subject IDs include the visit number in the combo dataset if it is NOT the first time point.
# Do the same here: Combine visit number with subject and create a new Subject variable so that it matches the combo:
intake <- intake %>%
mutate(Subject = ifelse(visitnumber != 1, paste0(part_id_status, "T", visitnumber), part_id_status))
# Calculate the month difference using BIRTHDATE and CVE date to make sure the autocalculator is correct:
# Install and load the lubridate package
# install.packages("lubridate")
# library(lubridate)
# the %/% operator is used to perform the floor division of the interval by months(1), which represents one month.
# intake$month_diff <- interval(intake$date_birth, intake$cve_date) %/% months(1)
# print(intake$month_diff)
# print(round(intake$calculator_age_cve))
# Not a big difference between the hand calculated and auto calculated field. Let's use "calculator_age_cve"
# Create a new variable representing final sldper100words ("disfluency_sldper100words_final) by taking disfluency_sldper100words from CVD as primary, but in the case that this data is missing, take the disfluency scores from CVE:
intake <- intake %>%
mutate(disfluency_sldper100words_final = ifelse(!is.na(disfluency_sldper100words), disfluency_sldper100words, disfluency_sldper100words_cve))
# Create a final talker group variable ("talkergroup_final) using disfluency_sldper100words_final and talker group based on parent report:
# 1: CWS, 0:CWNS, 9:unidentified
intake <- intake %>%
mutate(talkergroup_final = ifelse((disfluency_sldper100words_final >= 3 | calculator_talkergroup_parent == 1), 1,
ifelse((disfluency_sldper100words_final < 3 & calculator_talkergroup_parent == 0), 0, 9)))
# ignoring the NA's:
# intake <- intake %>%
# mutate(talkergroup_overall_final = ifelse((!is.na(talkergroup_disfluency_final) & talkergroup_disfluency_final == 1) |
# (!is.na(calculator_talkergroup_parent) & calculator_talkergroup_parent == 1), 1,
# ifelse((!is.na(talkergroup_disfluency_final) & talkergroup_disfluency_final == &
# (!is.na(calculator_talkergroup_parent) & calculator_talkergroup_parent == 0), 0, 9)))
# Take the relevant columns from intake dataset
# You may update this to take more columns into the dataset:
intake <- subset(intake, select=c('Subject','calculator_age_cve','calculator_gender_cve',
'calculator_talkergroup_parent','tso_calculated', 'disfluency_sldper100words','ssi_total',
'disfluency_sldper100words_final', 'talkergroup_final',
"gfta_standard", "ppvt_standard", "evt_standard",
"teld_rec_standard","teld_exp_standard", "teld_spokenlang_standard",
'cve_comments','comments_tasks'))
# Merge with the main dataset using SUBJECT
FULL <- merge(combo, intake, by=c("Subject"))
head(FULL)
## Subject Trial Type Standard Dev.P2 Mean Amplitude.P2 Standard Dev.N2
## 1 AE050318 NegNoGo 5.962168 3.531623 2.249580
## 2 AE050318 NeutNoGo 5.419626 -10.376971 2.866578
## 3 AE050318 NegGo 3.021852 -6.470334 3.907745
## 4 AE050318 NeutGo 2.577527 -10.326151 3.458130
## 5 AH101121 NegNoGo 3.187342 -4.029610 1.060874
## 6 AH101121 NegGo 2.182505 -7.668577 1.576681
## Mean Amplitude.N2 Standard Dev.P3 Mean Amplitude.P3 Peak Latency.P2
## 1 2.023796 2.252998 3.432536 256
## 2 -7.857202 2.118532 7.358546 204
## 3 -10.871784 1.377677 3.505749 220
## 4 -15.687000 1.537678 4.367052 228
## 5 -24.144933 4.750914 14.173249 244
## 6 -24.930473 4.063984 18.547604 232
## Peak Amplitude.P2 Peak Latency.N2 Peak Amplitude.N2 Peak Latency.P3
## 1 11.4069294 452 -2.268911 596
## 2 -4.5040871 452 -7.270952 720
## 3 -2.7612018 536 -5.187617 648
## 4 -7.3050607 376 -20.651664 588
## 5 -0.5727539 476 -25.672664 508
## 6 -5.8547045 492 -26.713190 528
## Peak Amplitude.P3 N2P2avg N2P2peak calculator_age_cve
## 1 6.870649 -1.507826 -13.675841 38.1
## 2 10.265959 2.519769 -2.766865 38.1
## 3 5.088120 -4.401450 -2.426415 38.1
## 4 7.176728 -5.360848 -13.346603 38.1
## 5 18.808306 -20.115323 -25.099910 70.5
## 6 23.899982 -17.261896 -20.858486 70.5
## calculator_gender_cve calculator_talkergroup_parent tso_calculated
## 1 0 1 1.9
## 2 0 1 1.9
## 3 0 1 1.9
## 4 0 1 1.9
## 5 1 0 NA
## 6 1 0 NA
## disfluency_sldper100words ssi_total disfluency_sldper100words_final
## 1 12 23 12
## 2 12 23 12
## 3 12 23 12
## 4 12 23 12
## 5 0 6 0
## 6 0 6 0
## talkergroup_final gfta_standard ppvt_standard evt_standard teld_rec_standard
## 1 1 121 126 123 146
## 2 1 121 126 123 146
## 3 1 121 126 123 146
## 4 1 121 126 123 146
## 5 0 104 111 111 110
## 6 0 104 111 111 110
## teld_exp_standard teld_spokenlang_standard cve_comments
## 1 135 149
## 2 135 149
## 3 135 149
## 4 135 149
## 5 115 115
## 6 115 115
## comments_tasks
## 1
## 2
## 3
## 4
## 5 Good data, attentive child, but sleepy during the Reactivity task.
## 6 Good data, attentive child, but sleepy during the Reactivity task.
# Check the subject numbers with missing stuttering assessments:
# rows_with_null <- FULL[is.na(FULL$disfluency_sldper100words) | is.na(FULL$ssi_total) | is.na(FULL$ssi_severity), ]
# | is.na(FULL_2$calculator_talkergroup_parent) makes no difference
# unique(rows_with_null$Subject)
# Show the rows where talkergroup_final = 9 or NA :
short_RT_rows <- subset(FULL, talkergroup_final == 9 | is.na(talkergroup_final))
short_RT_rows
## Subject Trial Type Standard Dev.P2 Mean Amplitude.P2 Standard Dev.N2
## 145 LW102219T3 NegGo 4.047862 -11.3533653 1.226660
## 146 LW102219T3 NegNoGo 7.716179 -14.3407325 1.015503
## 147 LW102219T3 NeutGo 3.809460 -16.0253408 3.957634
## 148 LW102219T3 NeutNoGo 7.502369 -13.4832903 2.684751
## 193 SA072518 NegGo 1.044944 -1.8257751 1.919929
## 194 SA072518 NegNoGo 3.253022 -2.8618672 2.448918
## 195 SA072518 NeutGo 3.037777 -1.3041270 2.886428
## 196 SA072518 NeutNoGo 2.757350 -0.2626314 2.512955
## Mean Amplitude.N2 Standard Dev.P3 Mean Amplitude.P3 Peak Latency.P2
## 145 -15.50399 0.9484819 0.7954588 160
## 146 -20.08492 0.9063114 2.8402350 160
## 147 -13.05854 2.4802956 -2.5216476 284
## 148 -16.74960 1.1207562 5.1640531 276
## 193 -10.16839 2.3822622 11.3534178 244
## 194 -18.03140 3.8740673 25.2926602 240
## 195 -8.41987 1.0071346 7.7124837 232
## 196 -10.71480 3.0017872 11.6209099 264
## Peak Amplitude.P2 Peak Latency.N2 Peak Amplitude.N2 Peak Latency.P3
## 145 -6.9007481 488 -16.33392 488
## 146 -5.0478235 392 -20.30981 684
## 147 -21.1105926 352 -18.30229 456
## 148 -23.1688279 352 -22.34015 680
## 193 -0.2973903 516 -12.97347 612
## 194 3.1531559 444 -21.27522 676
## 195 2.5669589 444 -11.77583 492
## 196 5.1291300 440 -14.92553 568
## Peak Amplitude.P3 N2P2avg N2P2peak calculator_age_cve
## 145 2.524455 -4.150623 -9.4331692 79.8
## 146 4.480037 -5.744191 -15.2619911 79.8
## 147 2.975747 2.966805 2.8082985 79.8
## 148 6.883948 -3.266312 0.8286815 79.8
## 193 15.744950 -8.342619 -12.6760764 73.0
## 194 31.451418 -15.169536 -24.4283726 73.0
## 195 9.651857 -7.115743 -14.3427915 73.0
## 196 16.708875 -10.452173 -20.0546604 73.0
## calculator_gender_cve calculator_talkergroup_parent tso_calculated
## 145 1 NA NA
## 146 1 NA NA
## 147 1 NA NA
## 148 1 NA NA
## 193 1 NA NA
## 194 1 NA NA
## 195 1 NA NA
## 196 1 NA NA
## disfluency_sldper100words ssi_total disfluency_sldper100words_final
## 145 NA NA 0
## 146 NA NA 0
## 147 NA NA 0
## 148 NA NA 0
## 193 NA NA NA
## 194 NA NA NA
## 195 NA NA NA
## 196 NA NA NA
## talkergroup_final gfta_standard ppvt_standard evt_standard
## 145 NA NA NA NA
## 146 NA NA NA NA
## 147 NA NA NA NA
## 148 NA NA NA NA
## 193 NA NA NA NA
## 194 NA NA NA NA
## 195 NA NA NA NA
## 196 NA NA NA NA
## teld_rec_standard teld_exp_standard teld_spokenlang_standard
## 145 NA NA NA
## 146 NA NA NA
## 147 NA NA NA
## 148 NA NA NA
## 193 NA NA NA
## 194 NA NA NA
## 195 NA NA NA
## 196 NA NA NA
## cve_comments
## 145 Did fluency count abs CELF screener completed at visit.
## 146 Did fluency count abs CELF screener completed at visit.
## 147 Did fluency count abs CELF screener completed at visit.
## 148 Did fluency count abs CELF screener completed at visit.
## 193
## 194
## 195
## 196
## comments_tasks
## 145 The participant was attentive and Movement was relatively minimal. However, appeared to have low accuracy on Zoo task despite understanding the task. He showed vocal discomfort for each button press on the no-go trials. His accuracy is expected to be below %50.
## 146 The participant was attentive and Movement was relatively minimal. However, appeared to have low accuracy on Zoo task despite understanding the task. He showed vocal discomfort for each button press on the no-go trials. His accuracy is expected to be below %50.
## 147 The participant was attentive and Movement was relatively minimal. However, appeared to have low accuracy on Zoo task despite understanding the task. He showed vocal discomfort for each button press on the no-go trials. His accuracy is expected to be below %50.
## 148 The participant was attentive and Movement was relatively minimal. However, appeared to have low accuracy on Zoo task despite understanding the task. He showed vocal discomfort for each button press on the no-go trials. His accuracy is expected to be below %50.
## 193
## 194
## 195
## 196
# MANUALLY LABEL THE TALKERGROUP FOR NA's:
# LW102219T3 == 0 because there is a record of a disflunecy count of 0, although no parent report available.
# SA072518 == 0, there is no indication in RedCap that the child stutterss
# Replace NA values in a specific column based on a condition:
FULL$talkergroup_final <- ifelse(FULL$Subject == "LW102219T3" & is.na(FULL$talkergroup_final), 0, FULL$talkergroup_final)
FULL$talkergroup_final <- ifelse(FULL$Subject == "SA072518" & is.na(FULL$talkergroup_final), 0, FULL$talkergroup_final)
# Making sure no 9 or NA remained:
any(FULL$talkergroup_final == 9 | is.na(FULL$talkergroup_final))
## [1] FALSE
# Create a new subset with ONLY good data, by removing those participants you identified as having bad data:
combo_good <- dplyr::filter(combo, Subject!="AE050318", Subject!="AL090917", Subject!="ES031519", Subject!="LS100617", Subject!="LG100721", Subject!="MS102319", Subject!="PB021519", Subject!="RT032219")
head(combo_good)
## Subject Trial Type Standard Dev.P2 Mean Amplitude.P2 Standard Dev.N2
## 1 AH101121 NegGo 2.182505 -7.668577 1.576681
## 2 AH101121 NegNoGo 3.187342 -4.029610 1.060874
## 3 AH101121 NeutGo 1.839147 -4.235501 2.891194
## 4 AH101121 NeutNoGo 3.776016 0.651977 2.494542
## 5 AK022218 NegGo 4.825410 8.927914 5.937896
## 6 AK022218 NegNoGo 3.586909 5.862981 5.437614
## Mean Amplitude.N2 Standard Dev.P3 Mean Amplitude.P3 Peak Latency.P2
## 1 -24.930473 4.063984 18.54760 232
## 2 -24.144933 4.750914 14.17325 244
## 3 -20.625652 4.391747 16.31334 224
## 4 -15.905247 5.252118 10.57876 236
## 5 -7.418664 1.763918 16.08891 204
## 6 -10.567050 6.287977 23.56859 204
## Peak Amplitude.P2 Peak Latency.N2 Peak Amplitude.N2 Peak Latency.P3
## 1 -5.8547045 492 -26.71319 528
## 2 -0.5727539 476 -25.67266 508
## 3 -1.3806822 540 -23.88108 508
## 4 6.3521030 452 -19.63251 588
## 5 16.4295335 488 -14.04309 648
## 6 11.5147669 452 -19.36042 628
## Peak Amplitude.P3 N2P2avg N2P2peak
## 1 23.89998 -17.26190 -20.85849
## 2 18.80831 -20.11532 -25.09991
## 3 21.79867 -16.39015 -22.50040
## 4 15.80878 -16.55722 -25.98461
## 5 19.63726 -16.34658 -30.47263
## 6 36.67178 -16.43003 -30.87518
# Load the file:
accuracy <- read.csv(file = '/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/Merged_Zoo_05.18.23.csv')
# Take only the relevant variables:
accuracy <- subset(accuracy, select=c('Name','VisitNumber','ShowStim.ACC','ShowStim.RESP','ShowStim.RT','StimTag'))
# Check out the class types for each variable.
sapply(accuracy, class)
## Name VisitNumber ShowStim.ACC ShowStim.RESP ShowStim.RT
## "character" "integer" "integer" "character" "integer"
## StimTag
## "character"
# For ShowStim.RESP response 4 is a "character", not integer.
print(class(accuracy$ShowStim.RESP))
## [1] "character"
# Convert character 4 for ShowStim.RESP to integer
# accuracy$ShowStim.RESP <- as.integer(accuracy$ShowStim.RESP)
# Create a new ACCURACY column based on Go or NoGo conditions.
# ShowStim.RESP: 4 means the child pushed the button (accurate for Go), NA means no response (accurate for NoGo).
# 1 is accurate, 0 is inaccurate:
accuracy <- accuracy %>%
filter((StimTag == 'negN' | StimTag == 'neuN') |
(StimTag == 'negG' | StimTag == 'neuG')) %>%
mutate(accuracy = case_when(
(StimTag == 'negN' | StimTag == 'neuN') & ShowStim.RESP == '4' ~ 0,
(StimTag == 'negN' | StimTag == 'neuN') & TRUE ~ 1,
(StimTag == 'negG' | StimTag == 'neuG') & ShowStim.RESP == '4' ~ 1,
(StimTag == 'negG' | StimTag == 'neuG') & TRUE ~ 0
))
####YOU MIGHT DECIDE TO REMOVE ALL TRIALS WITH RT SHORTER THAN 148MS FOR THE ACCURACY CALCULATION#####
# Display the short RT rows and how many of them there are:
short_RT_rows <- accuracy[(accuracy$ShowStim.RT < 148 & accuracy$ShowStim.RT > 1), ]
head(short_RT_rows)
## Name VisitNumber ShowStim.ACC ShowStim.RESP ShowStim.RT StimTag
## 636 AO013020 1 0 4 117 neuN
## NA <NA> NA NA <NA> NA <NA>
## 2002 WF080417 1 0 4 59 neuN
## 2004 WF080417 1 1 4 75 neuG
## 2006 WF080417 1 0 4 136 neuN
## NA.1 <NA> NA NA <NA> NA <NA>
## accuracy
## 636 0
## NA NA
## 2002 0
## 2004 1
## 2006 0
## NA.1 NA
print(nrow(short_RT_rows))
## [1] 587
# describe(short_RT_rows$StimTag)
# THERE ARE 587 ROWS WITH RT LOWER THAN 148ms but ALL of them are NOGO trials anyways, so they are incorrect anyways.
# NO NEED TO CHANGE THE ACCURACY MEASURE ABOVE because Low RT is relevant only for Go trials.
# Create a subset of whole data set by excluding the very short RT trials.
# accuracy_filtered <- accuracy[!(accuracy$ShowStim.RT < 148 & accuracy$ShowStim.RT > 1), ]
# discrepancies <- accuracy$accuracy != accuracy$accuracy_filtered
# discrepancy_rows <- which(discrepancies)
# print(discrepancy_rows)
# create ACCURACY PERCENTAGE column
accuracy_percent <- accuracy %>%
group_by(Name, VisitNumber, StimTag) %>%
dplyr::summarize(accuracy_percentage = mean(accuracy) * 100)
## `summarise()` has grouped output by 'Name', 'VisitNumber'. You can override
## using the `.groups` argument.
# calculate REACTION TIME for GO only
reaction_time <- accuracy %>%
filter(accuracy == 1 & (StimTag == 'negG' | StimTag == 'neuG')) %>%
group_by(Name, VisitNumber,StimTag) %>%
dplyr::summarize(reaction_time = mean(ShowStim.RT))
## `summarise()` has grouped output by 'Name', 'VisitNumber'. You can override
## using the `.groups` argument.
# COMBINE accuracy_percent and reaction_time
eprime <- full_join(accuracy_percent, reaction_time, by=c("Name", "VisitNumber", "StimTag"))
# Combine visit number with subject and create a new Subject variable for eprime so that it matches the FULL
eprime <- eprime %>%
mutate(Subject = ifelse(VisitNumber != 1, paste0(Name, "T", VisitNumber), Name))
# Rename the labels for StimTags on eprime data
eprime <- eprime %>%
mutate(StimTag = recode(StimTag, "negG" = "NegGo", "negN" = "NegNoGo", "neuG" = "NeutGo", "neuN" = "NeutNoGo"))
# Drop name and Visitnumber from eprime
# Ungroup the dataframe first
eprime <- ungroup(eprime)
eprime <- eprime %>%
dplyr::select(-Name, -VisitNumber) # eprime <- select(eprime, -Name, -VisitNumber)
# Replace Trial Type in FULL with "StimTag" to be able to merge with eprime data
FULL <- FULL %>%
dplyr::rename("StimTag" = "Trial Type")
# COMBINE ALL!!
ZOO <- merge(FULL, eprime, by=c("Subject", "StimTag"))
head(ZOO)
## Subject StimTag Standard Dev.P2 Mean Amplitude.P2 Standard Dev.N2
## 1 AE050318 NegGo 3.021852 -6.470334 3.907745
## 2 AE050318 NegNoGo 5.962168 3.531623 2.249580
## 3 AE050318 NeutGo 2.577527 -10.326151 3.458130
## 4 AE050318 NeutNoGo 5.419626 -10.376971 2.866578
## 5 AH101121 NegGo 2.182505 -7.668577 1.576681
## 6 AH101121 NegNoGo 3.187342 -4.029610 1.060874
## Mean Amplitude.N2 Standard Dev.P3 Mean Amplitude.P3 Peak Latency.P2
## 1 -10.871784 1.377677 3.505749 220
## 2 2.023796 2.252998 3.432536 256
## 3 -15.687000 1.537678 4.367052 228
## 4 -7.857202 2.118532 7.358546 204
## 5 -24.930473 4.063984 18.547604 232
## 6 -24.144933 4.750914 14.173249 244
## Peak Amplitude.P2 Peak Latency.N2 Peak Amplitude.N2 Peak Latency.P3
## 1 -2.7612018 536 -5.187617 648
## 2 11.4069294 452 -2.268911 596
## 3 -7.3050607 376 -20.651664 588
## 4 -4.5040871 452 -7.270952 720
## 5 -5.8547045 492 -26.713190 528
## 6 -0.5727539 476 -25.672664 508
## Peak Amplitude.P3 N2P2avg N2P2peak calculator_age_cve
## 1 5.088120 -4.401450 -2.426415 38.1
## 2 6.870649 -1.507826 -13.675841 38.1
## 3 7.176728 -5.360848 -13.346603 38.1
## 4 10.265959 2.519769 -2.766865 38.1
## 5 23.899982 -17.261896 -20.858486 70.5
## 6 18.808306 -20.115323 -25.099910 70.5
## calculator_gender_cve calculator_talkergroup_parent tso_calculated
## 1 0 1 1.9
## 2 0 1 1.9
## 3 0 1 1.9
## 4 0 1 1.9
## 5 1 0 NA
## 6 1 0 NA
## disfluency_sldper100words ssi_total disfluency_sldper100words_final
## 1 12 23 12
## 2 12 23 12
## 3 12 23 12
## 4 12 23 12
## 5 0 6 0
## 6 0 6 0
## talkergroup_final gfta_standard ppvt_standard evt_standard teld_rec_standard
## 1 1 121 126 123 146
## 2 1 121 126 123 146
## 3 1 121 126 123 146
## 4 1 121 126 123 146
## 5 0 104 111 111 110
## 6 0 104 111 111 110
## teld_exp_standard teld_spokenlang_standard cve_comments
## 1 135 149
## 2 135 149
## 3 135 149
## 4 135 149
## 5 115 115
## 6 115 115
## comments_tasks
## 1
## 2
## 3
## 4
## 5 Good data, attentive child, but sleepy during the Reactivity task.
## 6 Good data, attentive child, but sleepy during the Reactivity task.
## accuracy_percentage reaction_time
## 1 84.16667 706.0297
## 2 57.50000 NA
## 3 90.83333 796.2385
## 4 72.50000 NA
## 5 99.16667 643.1176
## 6 87.50000 NA
write.csv(ZOO, "/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/ZOO.csv")
# ZOO <- read.csv(file = '/Users/aysuerdemir/Desktop/R workspace/ERP_Zoo/CrossSectional/Mix/ZOO.csv')
ZOO_good <- subset(ZOO, accuracy_percentage > 75)
filtered_rows <- ZOO[ZOO$accuracy_percentage < 75, ]
nrow(ZOO)
## [1] 232
nrow(ZOO_good)
## [1] 163